/* 00_MAIN.do                 KTS/DCC/NLB                  yyyy-mm-dd:2026-02-04
----|----1----|----2----|----3----|----4----|----5----|----6----|----7----|----8

This do file creates the main data used in the paper "Estimating 
Inter­generational Returns to Medical Care: New Evidence from At­Risk Newborns" 
written by Damian Clarke, Nicolas Lillo Bustos and Kathya Tapia-Schythe.  
In certain cases these results will require the user-written, labutil,
personage commands. 
*/

*-------------------------------------------------------------------------------
*--- 0. MACROS  
*-------------------------------------------------------------------------------

clear all
set more off
set mem 100m
macro drop _all

* SEED:
global myseed 30081985

* DIRECTORIES
global maindir "" // Set Working Directory
global dodir "$maindir/source"
global datamain "$maindir/data"
global rawdata "$datamain/raw"
global dtadir "$datamain/dta"
cap mkdir "$dtadir/DEIS"
*global labeldos "$dodir/labels"
*cap mkdir "$labeldos"
global logdir "$maindir/log"
cap mkdir "$logdir"

* FILES:
* Raw csvs:
global rawnaccsv "$rawdata/DEIS/Nacimientos_1992_2018/NAC_1992_2018.csv"
global rawdefcsv "$rawdata/DEIS/DEF_1990-2018/DEF_1990-2018.csv"

* Base dtas:
global nac_original NAC_1992_2018
global def_original DEF_1990_2018
global eehh_original EEHH_2001_2019
global lastworkdata = "workingdata"
global lastworkdata = "workingdata"

* VARIABLES:
global byear_var ANO_NAC
global dyear_var ANO_DEF
global eyear_var ANO_EGRESO

* VARLISTS:
global g1blnctrls edmom EDAD_MADRE married doc_aten bregion_?? byear_????
global g2blnctrls edgmom EDAD_ABUELA marriedm doc_atenm bregion_??m byear_????m

* SCALARS:
global powerfactor = 0.25

* INIT:
cls
macro dir

* DIR 
cd "$dtadir/DEIS"

* Create date tag:
local today = date("$S_DATE", "DMY")
local datetag = string(year(`today'), "%02.0f") ///
			  + string(month(`today'), "%02.0f") ///
			  + string(day(`today'), "%02.0f")
			  
*-------------------------------------------------------------------------------
*--- 1. CSV to DTA   
*-------------------------------------------------------------------------------

* A: BIRTHS DATA:
capture log close _all
log using "$logdir/NAC_dataset_`datetag'", replace text name(NAC_dataset)
do "$dodir/01A_NAC_dataset.do"
log close _all

* B: DEATH DATA:
capture log close _all
log using "$logdir/DEF_dataset_`datetag'", replace text name(DEF_dataset)
do "$dodir/01B_DEF_dataset.do"
log close _all

* C: HOSPITALIZATION DATA:
capture log close _all
log using "$logdir/EEHH_dataset_`datetag'", replace text name(EEHH_dataset)
do "$dodir/01C_EEHH_dataset.do"

*-------------------------------------------------------------------------------
*--- 2. NEW VARIABLES   
*-------------------------------------------------------------------------------

* A: BIRTHS DATA MANIPULATION: 
/* Relevant datasets that will be used later: 
- Mother birth data
- Birth spacing 
- Small for Gestatoin Age variables 
- Market Activity Variables 
- Education variables 
- Heaping variables
- Low birthweight indicators
- Birth timing variables
- Mortality variables
- Income variables 
- Mother's income variables 
- Mother's ISAPRE status. 
*/
capture log close _all
log using "$logdir/NAC_NEWVARS_`datetag'", replace text name(NAC_NEWVARS)
do "$dodir/02A_NAC_NEWVARS.do"
log close _all

* B: HOSPITALIZATION DATA MANIPULATION:
/* Relevant datasets that will be used later: 
- Merge EEHH with NAC 
- NEWVARS 
- Yearly and Monthly Days spent in Hospital 
- Aggregation by: Age in Years and Age in Months. 
*/
			  
capture log close _all
log using "$logdir/EEHH_NEWVARS_`datetag'", replace text name(EEHH_NEWVARS)
do "$dodir/02B_EEHH_NEWVARS.do"
log close _all

*-------------------------------------------------------------------------------*
*--- 3. MERGE    
*-------------------------------------------------------------------------------*

* Create date tag:			  
capture log close _all
log using "$logdir/MERGE_`datetag'", replace text name(MERGE)
do "$dodir/03_MERGE.do"
log close _all

*-------------------------------------------------------------------------------
*--- 4. Data Corrections 
*-------------------------------------------------------------------------------

*a.- Age
gen	g2smpl = (mrg_mbdata2main==3 & EDAD_MADRE>=15 & EDAD_MADRE<=45 & ANO_NAC>=2007)

personage FECHA_NACIMIENTO_SIF_MADRE FECHA_NACIMIENTO_SIF if g2smpl==1, gen(mother_age) 
replace EDAD_MADRE=mother_age if EDAD_MADRE>26 & g2smpl==1

drop g2smpl
drop if EDAD_MADRE<10 // 1 corrected observation

*b.- Empleo
tab ACTIV_PADRE activ_p, m
replace activ_p=0 if ACTIV_PADRE=="0"
replace activ_p=1 if ACTIV_PADRE=="1"
replace activ_p=1 if ACTIV_PADRE=="2"
replace activ_p=. if ACTIV_PADRE=="3"
replace activ_p=. if ACTIV_PADRE==""
tab ACTIV_PADRE activ_p, m

tab ACTIV_MADRE activ_m, m
replace activ_m=0 if ACTIV_MADRE=="0"
replace activ_m=1 if ACTIV_MADRE=="1"
replace activ_m=1 if ACTIV_MADRE=="2"
replace activ_m=. if ACTIV_MADRE=="9"
replace activ_m=. if ACTIV_MADRE==""
tab ACTIV_MADRE activ_m, m

*c.- Save dataset:
compress
label data "Working dataset $S_DATE"
notes drop _dta
note: Last modification timestamp: $S_DATE at $S_TIME
save "workingdata.dta", replace
